## [1] "codeindyr" "code" "countryname" "year" "indicator"
## [6] "estimate" "stddev" "nsource" "pctrank" "pctranklower"
## [11] "pctrankupper"
## # A tibble: 6 × 5
## countryname code year wgi_cc wgi_ge
## <chr> <chr> <dbl> <chr> <chr>
## 1 Afghanistan AFG 2022 -1.1836843490600586 -1.8800348043441772
## 2 Albania ALB 2022 -0.40818944573402405 6.454053521156311E-2
## 3 Algeria DZA 2022 -0.63804107904434204 -0.51349949836730957
## 4 American Samoa ASM 2022 1.2702723741531372 0.66762912273406982
## 5 Andorra ADO 2022 1.2702723741531372 1.4952607154846191
## 6 Angola AGO 2022 -0.61237573623657227 -1.0260342359542847
## [1] "Country Name" "Country Code" "Series Name" "Series Code"
## [5] "2022 [YR2022]"
## Country Name Country Code
## 1 Afghanistan AFG
## 2 Afghanistan AFG
## 3 Afghanistan AFG
## 4 Afghanistan AFG
## 5 American Samoa ASM
## 6 American Samoa ASM
## Series Name
## 1 Carbon dioxide (CO2) emissions excluding LULUCF per capita (t CO2e/capita)
## 2 GDP per capita (constant 2015 US$)
## 3 Energy use (kg of oil equivalent per capita)
## 4 Urban population (% of total population)
## 5 Carbon dioxide (CO2) emissions excluding LULUCF per capita (t CO2e/capita)
## 6 GDP per capita (constant 2015 US$)
## Series Code 2022 [YR2022]
## 1 EN.GHG.CO2.PC.CE.AR5 0.20355189041619276
## 2 NY.GDP.PCAP.KD 377.66562708070467
## 3 EG.USE.PCAP.KG.OE ..
## 4 SP.URB.TOTL.IN.ZS 26.616
## 5 EN.GHG.CO2.PC.CE.AR5 0.0020685945968309132
## 6 NY.GDP.PCAP.KD 13709.097489547281
## 'data.frame': 865 obs. of 5 variables:
## $ Country Name : chr "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
## $ Country Code : chr "AFG" "AFG" "AFG" "AFG" ...
## $ Series Name : chr "Carbon dioxide (CO2) emissions excluding LULUCF per capita (t CO2e/capita)" "GDP per capita (constant 2015 US$)" "Energy use (kg of oil equivalent per capita)" "Urban population (% of total population)" ...
## $ Series Code : chr "EN.GHG.CO2.PC.CE.AR5" "NY.GDP.PCAP.KD" "EG.USE.PCAP.KG.OE" "SP.URB.TOTL.IN.ZS" ...
## $ 2022 [YR2022]: chr "0.20355189041619276" "377.66562708070467" ".." "26.616" ...
## # A tibble: 6 × 6
## country_name country_code carbon_dioxide_co2_e…¹ gdp_per_capita_const…²
## <chr> <chr> <chr> <chr>
## 1 Afghanistan AFG 0.20355189041619276 377.66562708070467
## 2 American Samoa ASM 0.0020685945968309132 13709.097489547281
## 3 Antigua and Barbuda ATG 3.3013787160706594 17456.872224310424
## 4 Aruba ABW 4.6845587550088537 29917.128028704523
## 5 Azerbaijan AZE 3.8875713436608024 5599.5852510053855
## 6 Bangladesh BGD 0.73252044425188623 1804.3493461776036
## # ℹ abbreviated names:
## # ¹carbon_dioxide_co2_emissions_excluding_lulucf_per_capita_t_co2e_capita,
## # ²gdp_per_capita_constant_2015_us
## # ℹ 2 more variables: energy_use_kg_of_oil_equivalent_per_capita <chr>,
## # urban_population_percent_of_total_population <chr>
## # A tibble: 6 × 6
## country code co2_pc PBI_pc energy_pc urban_pop
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Afghanistan AFG 0.20355189041619276 377.66562… .. 26.616
## 2 American Samoa ASM 0.0020685945968309132 13709.097… .. 87.196
## 3 Antigua and Barbuda ATG 3.3013787160706594 17456.872… .. 24.346
## 4 Aruba ABW 4.6845587550088537 29917.128… .. 44.052
## 5 Azerbaijan AZE 3.8875713436608024 5599.5852… 1714.037… 57.17
## 6 Bangladesh BGD 0.73252044425188623 1804.3493… 297.1183… 39.711
## tibble [215 × 6] (S3: tbl_df/tbl/data.frame)
## $ country : chr [1:215] "Afghanistan" "American Samoa" "Antigua and Barbuda" "Aruba" ...
## $ code : chr [1:215] "AFG" "ASM" "ATG" "ABW" ...
## $ co2_pc : chr [1:215] "0.20355189041619276" "0.0020685945968309132" "3.3013787160706594" "4.6845587550088537" ...
## $ PBI_pc : chr [1:215] "377.66562708070467" "13709.097489547281" "17456.872224310424" "29917.128028704523" ...
## $ energy_pc: chr [1:215] ".." ".." ".." ".." ...
## $ urban_pop: chr [1:215] "26.616" "87.196" "24.346" "44.052" ...
## tibble [215 × 6] (S3: tbl_df/tbl/data.frame)
## $ country : chr [1:215] "Afghanistan" "American Samoa" "Antigua and Barbuda" "Aruba" ...
## $ code : chr [1:215] "AFG" "ASM" "ATG" "ABW" ...
## $ co2_pc : num [1:215] 0.20355 0.00207 3.30138 4.68456 3.88757 ...
## $ PBI_pc : num [1:215] 378 13709 17457 29917 5600 ...
## $ energy_pc: num [1:215] NA NA NA NA 1714 ...
## $ urban_pop: num [1:215] 26.6 87.2 24.3 44.1 57.2 ...
## country code co2_pc PBI_pc energy_pc urban_pop
## 0 0 14 10 67 2
## Rows: 136
## Columns: 10
## $ country <chr> "Azerbaijan", "Bangladesh", "Belgium", "Bosnia and Herzego…
## $ code <chr> "AZE", "BGD", "BEL", "BIH", "BFA", "KHM", "ALB", "ARG", "A…
## $ co2_pc <dbl> 3.8875713, 0.7325204, 7.6830211, 7.0870837, 0.2729526, 1.0…
## $ PBI_pc <dbl> 5599.5853, 1804.3493, 44596.7860, 6327.0629, 739.3630, 201…
## $ energy_pc <dbl> 1714.0373, 297.1184, 4368.4456, 2270.7264, 312.9291, 501.6…
## $ urban_pop <dbl> 57.170, 39.711, 98.153, 49.841, 31.877, 25.114, 63.799, 92…
## $ countryname <chr> "Azerbaijan", "Bangladesh", "Belgium", "Bosnia and Herzego…
## $ year <dbl> 2022, 2022, 2022, 2022, 2022, 2022, 2022, 2022, 2022, 2022…
## $ wgi_cc <dbl> -1.04090738, -1.07575178, 1.49497354, -0.68431562, -0.1116…
## $ wgi_ge <dbl> -0.04120710, -0.76335531, 1.22718644, -1.06496072, -0.8531…
## [1] 136
Mapa
world <- world %>%
dplyr::mutate(
iso3 = countrycode::countrycode(country,
origin = "country.name",
destination = "iso3c")
)
world_map <- rnaturalearth::ne_countries(
scale = "medium",
returnclass = "sf"
)
map_data <- dplyr::left_join(
world_map,
world,
by = c("iso_a3" = "iso3")
)
fig <- plotly::plot_ly(
data = world_plot,
type = "choropleth",
locations = ~code,
locationmode = "ISO-3",
z = ~co2_log,
text = ~paste(
"<b>País:</b>", country,
"<br><b>CO₂ per cápita:</b>", round(co2_pc, 2), "t"
),
hoverinfo = "text",
colorscale = "Blues",
reversescale = TRUE,
marker = list(
line = list(color = "gray20", width = 0.3)
),
colorbar = list(
title = "CO₂ per cápita (t)",
tickvals = log10(c(0.1, 1, 5, 10, 20)),
ticktext = c("0.1", "1", "5", "10", "20"),
x = 1.03,
y = 0.75
)
) %>%
plotly::layout(
title = list(
text = "<b>Mapa mundial de calor de emisiones CO₂ per cápita del año 2022</b>",
x = 0.5, y = 0.97,
font = list(size = 22)
),
annotations = list(
list(
text = "Valores representados en escala logarítmica para facilitar comparación global",
x = 0.5, y = 0.91,
xref = "paper", yref = "paper",
showarrow = FALSE,
font = list(size = 16, color = "gray30")
),
list(
text = "Fuente: World Development Indicators (2023) | CO₂ per cápita (t CO₂e/persona)",
x = 0.5, y = -0.1,
xref = "paper", yref = "paper",
showarrow = FALSE,
font = list(size = 11, color = "gray40")
)
),
geo = list(
showframe = FALSE,
projection = list(type = "robinson"),
showcoastlines = TRUE,
coastlinecolor = "gray40",
showcountries = TRUE,
countrycolor = "gray50"
)
)
fig
## [1] "country" "code" "co2_pc" "PBI_pc" "energy_pc"
## [6] "urban_pop" "countryname" "year" "wgi_cc" "wgi_ge"
## [11] "iso3"
descriptivos <- world %>%
summarise(
media_ge = mean(wgi_ge),
sd_ge = sd(wgi_ge),
media_co2 = mean(co2_pc),
sd_co2 = sd(co2_pc),
media_pbi = mean(PBI_pc),
sd_pbi = sd(PBI_pc),
media_energy = mean(energy_pc),
sd_energy = sd(energy_pc),
media_urban = mean(urban_pop),
sd_urban = sd(urban_pop)
) %>%
pivot_longer(everything(),
names_to = "Medida",
values_to = "Valor")
descriptivos %>%
gt() %>%
tab_header(
title = "Estadísticos Descriptivos de las Variables del Estudio"
) %>%
fmt_number(columns = Valor, decimals = 3)
| Estadísticos Descriptivos de las Variables del Estudio | |
| Medida | Valor |
|---|---|
| media_ge | 0.080 |
| sd_ge | 0.967 |
| media_co2 | 5.182 |
| sd_co2 | 6.125 |
| media_pbi | 17,154.785 |
| sd_pbi | 21,822.907 |
| media_energy | 2,392.676 |
| sd_energy | 2,776.515 |
| media_urban | 64.898 |
| sd_urban | 21.280 |
Graficos
ggplot(world, aes(x = wgi_ge)) +
geom_histogram(bins = 30, aes(fill = ..count..), color = "white") +
scale_fill_viridis(option = "E") +
labs(
title = "Distribución del Indicador de Efectividad del Gobierno (WGI)",
x = "WGI - Efectividad del Gobierno",
y = "Frecuencia"
) +
theme_minimal(base_size = 14) +
theme(legend.position = "none")
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
ggplot(world, aes(x = co2_pc)) +
geom_histogram(binwidth = 0.2, fill = "#27AE60", color = "white") +
scale_x_log10(
labels = label_comma(),
breaks = c(0.1, 0.5, 1, 2, 5, 10, 20)
) +
labs(
title = "Distribución de las Emisiones de CO2 per cápita (escala logarítmica)",
x = "CO₂ per cápita (toneladas, escala log)",
y = "Frecuencia (número de países)"
) +
theme_minimal(base_size = 14) +
theme(
panel.grid.minor = element_blank(),
plot.title = element_text(face = "bold")
)
ggplot(world, aes(x = PBI_pc)) +
geom_histogram(bins = 30, aes(fill = ..count..), color = "white") +
scale_fill_viridis(option = "C", direction = -1) +
scale_x_log10(labels = comma_format()) +
labs(
title = "Distribución del PBI per cápita (escala logarítmica)",
x = "PBI per cápita (US$ constantes 2015, escala log)",
y = "Frecuencia",
fill = "Frecuencia"
) +
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(face = "bold", size = 16),
panel.grid.minor = element_blank()
)
ggplot(world, aes(x = energy_pc)) +
geom_histogram(bins = 30, aes(fill = ..count..), color = "white") +
scale_fill_viridis(option = "D", direction = 1) +
scale_x_log10(labels = comma_format()) +
labs(
title = "Distribución del Uso de Energía per cápita (escala logarítmica)",
x = "Energía (kg de petróleo equivalente per cápita, escala log)",
y = "Frecuencia",
fill = "Frecuencia"
) +
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(face = "bold", size = 16),
panel.grid.minor = element_blank()
)
ggplot(world, aes(x = urban_pop)) +
geom_histogram(bins = 30, aes(fill = ..count..), color = "white") +
scale_fill_viridis(option = "B") +
labs(
title = "Distribución del Porcentaje de Población Urbana",
x = "% población urbana",
y = "Frecuencia"
) +
theme_minimal(base_size = 14) +
theme(legend.position = "none")
ggplot(world, aes(x = wgi_cc)) +
geom_histogram(bins = 30, aes(fill = ..count..), color = "white") +
scale_fill_viridis(option = "A") +
labs(
title = "Distribución del Indicador de Control de Corrupción (WGI)",
x = "WGI - Control de Corrupción",
y = "Frecuencia"
) +
theme_minimal(base_size = 14) +
theme(legend.position = "none")
Analisis Bivariado
En base a lo anteriormente visto se usarán las variables pertinentes como variables logaritmicas
world <- world %>%
filter(co2_pc > 0,
PBI_pc > 0,
energy_pc > 0) %>%
mutate(
co2_pc = log(co2_pc),
PBI_pc = log(PBI_pc),
energy_pc = log(energy_pc)
)
Co2 vs PBI_pc
cor_PBI <- stats::cor(world$co2_pc, world$PBI_pc)
cor_PBI
## [1] 0.8311698
tabla_cor_PBI <- dplyr::tibble(
variable_dependiente = "co2_pc",
variable_independiente = "PBI_pc",
correlacion_pearson = cor_PBI
)
cor_PBI <- stats::cor(world$co2_pc, world$PBI_pc)
tabla_co2_pbi <- dplyr::tibble(
variable_dependiente = "CO2 per cápita",
variable_independiente = "PBI per cápita",
correlacion_pearson = cor_PBI
)
gt::gt(tabla_co2_pbi) %>%
gt::fmt_number(columns = correlacion_pearson, decimals = 3) %>%
gt::tab_header(title = "Correlación: CO₂ vs PBI per cápita")
| Correlación: CO₂ vs PBI per cápita | ||
| variable_dependiente | variable_independiente | correlacion_pearson |
|---|---|---|
| CO2 per cápita | PBI per cápita | 0.831 |
gráfico 1.2
g_PBI <- ggplot2::ggplot(world, ggplot2::aes(x = PBI_pc, y = co2_pc)) +
ggplot2::geom_point(alpha = 0.7, color = viridis::viridis(1, option = "D")) +
ggplot2::geom_smooth(method = "lm", se = TRUE, color = "black") +
ggplot2::scale_x_continuous(labels = scales::label_comma()) +
ggplot2::scale_y_continuous(labels = scales::label_comma()) +
ggplot2::labs(
title = "CO₂ per cápita vs PBI per cápita",
x = "PBI per cápita (US$ constantes 2015)",
y = "CO₂ per cápita (t CO₂e/cápita)"
) +
ggplot2::theme_minimal(base_size = 14)
g_PBI
## `geom_smooth()` using formula = 'y ~ x'
2. CO2 vs energy_pc
cor_energy <- cor(world$co2_pc, world$energy_pc)
cor_energy
## [1] 0.9273561
tabla_cor_energy <- tibble::tibble(
variable_dependiente = "co2_pc",
variable_independiente = "energy_pc",
correlacion_pearson = cor_energy
)
tabla_cor_energy %>%
gt::gt() %>%
gt::fmt_number(columns = correlacion_pearson, decimals = 3) %>%
gt::tab_header(
title = "Correlación entre CO₂ per cápita y uso de energía per cápita"
)
| Correlación entre CO₂ per cápita y uso de energía per cápita | ||
| variable_dependiente | variable_independiente | correlacion_pearson |
|---|---|---|
| co2_pc | energy_pc | 0.927 |
g_energy <- ggplot2::ggplot(world, ggplot2::aes(x = energy_pc, y = co2_pc)) +
ggplot2::geom_point(alpha = 0.7, color = viridis::viridis(1, option = "E")) +
ggplot2::geom_smooth(method = "lm", se = TRUE, color = "black") +
ggplot2::scale_x_continuous(labels = scales::label_comma()) +
ggplot2::scale_y_continuous(labels = scales::label_comma()) +
ggplot2::labs(
title = "CO₂ per cápita vs Uso de energía per cápita",
x = "Uso de energía per cápita (kg de equivalente de petróleo)",
y = "CO₂ per cápita (t CO₂e/cápita)"
) +
ggplot2::theme_minimal(base_size = 14)
g_energy
## `geom_smooth()` using formula = 'y ~ x'
cor_urban <- stats::cor(world$co2_pc, world$urban_pop)
cor_urban
## [1] 0.7071991
tabla_cor_urban <- dplyr::tibble(
variable_dependiente = "co2_pc",
variable_independiente = "urban_pop",
correlacion_pearson = cor_urban
)
gt::gt(tabla_cor_urban) %>%
gt::fmt_number(columns = correlacion_pearson, decimals = 3) %>%
gt::tab_header(
title = "Correlación entre CO₂ per cápita y Población urbana"
)
| Correlación entre CO₂ per cápita y Población urbana | ||
| variable_dependiente | variable_independiente | correlacion_pearson |
|---|---|---|
| co2_pc | urban_pop | 0.707 |
g_urban <- ggplot2::ggplot(world, ggplot2::aes(x = urban_pop, y = co2_pc)) +
ggplot2::geom_point(alpha = 0.7, color = viridis::viridis(1, option = "C")) +
ggplot2::geom_smooth(method = "lm", se = TRUE, color = "black") +
ggplot2::scale_x_continuous(labels = scales::label_comma()) +
ggplot2::scale_y_continuous(labels = scales::label_comma()) +
ggplot2::labs(
title = "CO₂ per cápita vs Población urbana",
x = "Población urbana (% del total)",
y = "CO₂ per cápita (t CO₂e/cápita)"
) +
ggplot2::theme_minimal(base_size = 14)
g_urban
## `geom_smooth()` using formula = 'y ~ x'
cor_wgi_cc <- stats::cor(world$co2_pc, world$wgi_cc)
cor_wgi_cc
## [1] 0.5124914
tabla_cor_wgi_cc <- dplyr::tibble(
variable_dependiente = "co2_pc",
variable_independiente = "wgi_cc",
correlacion_pearson = cor_wgi_cc
)
gt::gt(tabla_cor_wgi_cc) %>%
gt::fmt_number(columns = correlacion_pearson, decimals = 3) %>%
gt::tab_header(
title = "Correlación entre CO₂ per cápita y Control de la Corrupción"
)
| Correlación entre CO₂ per cápita y Control de la Corrupción | ||
| variable_dependiente | variable_independiente | correlacion_pearson |
|---|---|---|
| co2_pc | wgi_cc | 0.512 |
g_wgi_cc <- ggplot2::ggplot(world, ggplot2::aes(x = wgi_cc, y = co2_pc)) +
ggplot2::geom_point(alpha = 0.7, color = viridis::viridis(1, option = "F")) +
ggplot2::geom_smooth(method = "lm", se = TRUE, color = "black") +
ggplot2::scale_x_continuous(labels = scales::label_comma()) +
ggplot2::scale_y_continuous(labels = scales::label_comma()) +
ggplot2::labs(
title = "CO₂ per cápita vs Control de la Corrupción",
x = "Control de la Corrupción (WGI_CC)",
y = "CO₂ per cápita (t CO₂e/cápita)"
) +
ggplot2::theme_minimal(base_size = 14)
g_wgi_cc
## `geom_smooth()` using formula = 'y ~ x'
cor_wgi_ge <- stats::cor(world$co2_pc, world$wgi_ge)
cor_wgi_ge
## [1] 0.5867898
tabla_cor_wgi_ge <- dplyr::tibble(
variable_dependiente = "co2_pc",
variable_independiente = "wgi_ge",
correlacion_pearson = cor_wgi_ge
)
gt::gt(tabla_cor_wgi_ge) %>%
gt::fmt_number(columns = correlacion_pearson, decimals = 3) %>%
gt::tab_header(
title = "Correlación entre CO₂ per cápita y Gobierno Efectivo"
)
| Correlación entre CO₂ per cápita y Gobierno Efectivo | ||
| variable_dependiente | variable_independiente | correlacion_pearson |
|---|---|---|
| co2_pc | wgi_ge | 0.587 |
g_wgi_ge <- ggplot2::ggplot(world, ggplot2::aes(x = wgi_ge, y = co2_pc)) +
ggplot2::geom_point(alpha = 0.7, color = viridis::viridis(1, option = "A")) +
ggplot2::geom_smooth(method = "lm", se = TRUE, color = "black") +
ggplot2::scale_x_continuous(labels = scales::label_comma()) +
ggplot2::scale_y_continuous(labels = scales::label_comma()) +
ggplot2::labs(
title = "CO₂ per cápita vs Gobierno Efectivo",
x = "Gobierno Efectivo (WGI_GE)",
y = "CO₂ per cápita (t CO₂e/cápita)"
) +
ggplot2::theme_minimal(base_size = 14)
g_wgi_ge
## `geom_smooth()` using formula = 'y ~ x'
Regresión Gauss
##
## Call:
## stats::lm(formula = co2_pc ~ wgi_cc + wgi_ge, data = world)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.09271 -0.53770 -0.02201 0.57872 2.60641
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.90395 0.09218 9.806 < 2e-16 ***
## wgi_cc -0.36733 0.24717 -1.486 0.14
## wgi_ge 1.14168 0.26157 4.365 2.53e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.037 on 133 degrees of freedom
## Multiple R-squared: 0.355, Adjusted R-squared: 0.3453
## F-statistic: 36.61 on 2 and 133 DF, p-value: 2.159e-13
##
## Call:
## stats::lm(formula = co2_pc ~ wgi_cc + wgi_ge + PBI_pc, data = world)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.4869 -0.4721 -0.0290 0.4744 1.3927
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.5960 0.6437 -13.355 < 2e-16 ***
## wgi_cc -0.6359 0.1531 -4.153 5.86e-05 ***
## wgi_ge 0.1388 0.1746 0.795 0.428
## PBI_pc 1.0727 0.0724 14.816 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.638 on 132 degrees of freedom
## Multiple R-squared: 0.7578, Adjusted R-squared: 0.7523
## F-statistic: 137.7 on 3 and 132 DF, p-value: < 2.2e-16
##
## Call:
## stats::lm(formula = co2_pc ~ wgi_cc + wgi_ge + PBI_pc + urban_pop,
## data = world)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.39408 -0.46346 -0.02925 0.45980 1.29372
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.024233 0.712238 -11.266 < 2e-16 ***
## wgi_cc -0.657881 0.152297 -4.320 3.06e-05 ***
## wgi_ge 0.214049 0.177992 1.203 0.2313
## PBI_pc 0.953516 0.097449 9.785 < 2e-16 ***
## urban_pop 0.007488 0.004140 1.809 0.0728 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6326 on 131 degrees of freedom
## Multiple R-squared: 0.7637, Adjusted R-squared: 0.7565
## F-statistic: 105.9 on 4 and 131 DF, p-value: < 2.2e-16
modelo_4 <- stats::lm(
formula = co2_pc ~ wgi_cc + wgi_ge + PBI_pc + urban_pop + energy_pc,
data = world
)
summary(modelo_4)
##
## Call:
## stats::lm(formula = co2_pc ~ wgi_cc + wgi_ge + PBI_pc + urban_pop +
## energy_pc, data = world)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.3973 -0.2132 0.0440 0.2714 0.8557
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.753801 0.481486 -18.181 < 2e-16 ***
## wgi_cc -0.463281 0.103363 -4.482 1.6e-05 ***
## wgi_ge 0.193093 0.119475 1.616 0.108479
## PBI_pc 0.300214 0.083259 3.606 0.000442 ***
## urban_pop 0.004980 0.002786 1.787 0.076195 .
## energy_pc 0.924533 0.072908 12.681 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4246 on 130 degrees of freedom
## Multiple R-squared: 0.8944, Adjusted R-squared: 0.8903
## F-statistic: 220.1 on 5 and 130 DF, p-value: < 2.2e-16
##
## Call:
## stats::lm(formula = co2_pc ~ wgi_cc + PBI_pc + energy_pc, data = world)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.3893 -0.2035 0.0242 0.2975 0.8131
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.33192 0.40520 -23.031 < 2e-16 ***
## wgi_cc -0.34506 0.06216 -5.551 1.50e-07 ***
## PBI_pc 0.39575 0.07100 5.574 1.35e-07 ***
## energy_pc 0.93353 0.07347 12.707 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4289 on 132 degrees of freedom
## Multiple R-squared: 0.8905, Adjusted R-squared: 0.8881
## F-statistic: 358 on 3 and 132 DF, p-value: < 2.2e-16
Elección de regresión
## Analysis of Variance Table
##
## Model 1: co2_pc ~ wgi_cc + wgi_ge
## Model 2: co2_pc ~ wgi_cc + wgi_ge + PBI_pc
## Model 3: co2_pc ~ wgi_cc + wgi_ge + PBI_pc + urban_pop
## Model 4: co2_pc ~ wgi_cc + wgi_ge + PBI_pc + urban_pop + energy_pc
## Model 5: co2_pc ~ wgi_cc + PBI_pc + energy_pc
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 133 143.097
## 2 132 53.734 1 89.364 495.7087 < 2.2e-16 ***
## 3 131 52.425 1 1.309 7.2608 0.007978 **
## 4 130 23.436 1 28.989 160.8044 < 2.2e-16 ***
## 5 132 24.285 -2 -0.850 2.3565 0.098782 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Diagnostico del modelo 4
mod_final <- lm(co2_pc ~ wgi_cc + wgi_ge + PBI_pc + urban_pop + energy_pc, data = world)
Linealidad
plot(mod_final, 1,
col = "#4B4B4B",
pch = 19,
cex = 0.8,
main = "Evaluando Linealidad - Modelo 4")
Homocedasticidad
bp_mod4 <- bptest(mod_final)
data.frame(
BP = bp_mod4$statistic,
df = bp_mod4$parameter,
p_value = bp_mod4$p.value
) %>%
kable(caption = bp_mod4$method) %>%
kable_styling(full_width = FALSE)
| BP | df | p_value | |
|---|---|---|---|
| BP | 15.62326 | 5 | 0.0080062 |
plot(mod_final, 3,
col = "blue",
pch = 19,
cex = 0.8,
main = "Evaluando Homocedasticidad - Modelo 4")
Normalidad de residuos
sw_mod4 <- shapiro.test(mod_final$residuals)
data.frame(
W = sw_mod4$statistic,
p_value = sw_mod4$p.value
) %>%
kable(caption = sw_mod4$method) %>%
kable_styling(full_width = FALSE)
| W | p_value | |
|---|---|---|
| W | 0.9662635 | 0.0018936 |
plot(mod_final, 2,
col = "#4B4B4B",
pch = 19,
cex = 0.8,
main = "Evaluando Normalidad de Residuos - Modelo 4")
sw_mod4 <- shapiro.test(mod_final$residuals)
data.frame(
W = sw_mod4$statistic,
p_value = sw_mod4$p.value
) %>%
kable(caption = sw_mod4$method) %>%
kable_styling(full_width = FALSE)
| W | p_value | |
|---|---|---|
| W | 0.9662635 | 0.0018936 |
Multicolinealidad
vif_mod4 <- VIF(mod_final)
data.frame(VIF = vif_mod4) %>%
kable(caption = "Evaluación de Multicolinealidad (VIF) - Modelo 4") %>%
kable_styling(full_width = FALSE)
| VIF | |
|---|---|
| wgi_cc | 8.377627 |
| wgi_ge | 9.994299 |
| PBI_pc | 9.817995 |
| urban_pop | 2.631879 |
| energy_pc | 4.022312 |
Valores Influyentes
infl_mod4 <- as.data.frame(influence.measures(mod_final)$is.inf)
infl_mod4[infl_mod4$cook.d | infl_mod4$hat, c("cook.d","hat")] %>%
kable(caption = "Valores Influyentes Críticos - Modelo 4") %>%
kable_styling(full_width = FALSE)
| cook.d | hat |
|---|---|
| NA | NA |
| :—— | :— |
Analisis factorial
corMatrix <- cor(theData[, efa_vars], use = "pairwise.complete.obs")
round(corMatrix, 2)
## co2_pc PBI_pc energy_pc urban_pop wgi_cc wgi_ge
## co2_pc 1.00 0.83 0.93 0.71 0.51 0.59
## PBI_pc 0.83 1.00 0.86 0.76 0.80 0.83
## energy_pc 0.93 0.86 1.00 0.69 0.61 0.66
## urban_pop 0.71 0.76 0.69 1.00 0.54 0.53
## wgi_cc 0.51 0.80 0.61 0.54 1.00 0.94
## wgi_ge 0.59 0.83 0.66 0.53 0.94 1.00
fa.parallel(theData, fa = 'fa',correct = T,plot = F)
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Parallel analysis suggests that the number of factors = 2 and the number of components = NA
psych::KMO(corMatrix)
## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = corMatrix)
## Overall MSA = 0.81
## MSA for each item =
## co2_pc PBI_pc energy_pc urban_pop wgi_cc wgi_ge
## 0.76 0.89 0.82 0.88 0.74 0.77
cortest.bartlett(corMatrix, n = nrow(theData))
## $chisq
## [1] 1040.848
##
## $p.value
## [1] 2.35711e-212
##
## $df
## [1] 15
is.singular.matrix(corMatrix)
## [1] FALSE
fa.parallel(theData[, efa_vars],
fa = "fa",
correct = TRUE,
plot = TRUE)
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Parallel analysis suggests that the number of factors = 2 and the number of components = NA
resfa <- fa(theData[, efa_vars],
nfactors = 2,
cor = "mixed",
rotate = "varimax",
fm = "minres")
print(resfa$loadings)
##
## Loadings:
## MR1 MR2
## co2_pc 0.944 0.243
## PBI_pc 0.740 0.626
## energy_pc 0.868 0.364
## urban_pop 0.670 0.350
## wgi_cc 0.304 0.938
## wgi_ge 0.384 0.869
##
## MR1 MR2
## SS loadings 2.881 2.341
## Proportion Var 0.480 0.390
## Cumulative Var 0.480 0.870
print(resfa$loadings, cutoff = 0.5)
##
## Loadings:
## MR1 MR2
## co2_pc 0.944
## PBI_pc 0.740 0.626
## energy_pc 0.868
## urban_pop 0.670
## wgi_cc 0.938
## wgi_ge 0.869
##
## MR1 MR2
## SS loadings 2.881 2.341
## Proportion Var 0.480 0.390
## Cumulative Var 0.480 0.870
fa.diagram(resfa,
main = "Resultados del EFA - Factores institucionales y energéticos")
sort(resfa$communality)
## urban_pop energy_pc wgi_ge PBI_pc co2_pc wgi_cc
## 0.5713920 0.8864492 0.9024824 0.9391297 0.9495300 0.9728354
sort(resfa$complexity)
## co2_pc wgi_cc energy_pc wgi_ge urban_pop PBI_pc
## 1.132319 1.207837 1.341759 1.377274 1.509383 1.946066
resfa$TLI
## [1] 0.8733423
resfa$rms
## [1] 0.01823503
resfa$RMSEA
## RMSEA lower upper confidence
## 0.2509692 0.1829350 0.3276580 0.9000000
resfa$BIC
## [1] 18.64315
head(resfa$scores)
## MR1 MR2
## [1,] 0.4738323 -1.0486713
## [2,] -0.9328441 -0.7087230
## [3,] 0.6088820 1.2910130
## [4,] 0.9989459 -1.0885909
## [5,] -2.0240202 0.3141251
## [6,] -0.6764434 -0.8682235
theData$Factor_1 <- resfa$scores[, 1]
theData$Factor_2 <- resfa$scores[, 2]
world <- world %>%
left_join(
theData[, c("idx_row", "Factor_1", "Factor_2")],
by = "idx_row"
)
world <- world %>%
rename(
F_institucional = Factor_1,
F_energia_desarrollo = Factor_2
)
Cluster
library(ggplot2)
ggplot(cluster_data,
aes(x = F_institucional,
y = F_energia_desarrollo,
color = cluster_k3,
label = country)) +
geom_point(size = 2) +
theme_minimal() +
labs(
x = "Factor institucional",
y = "Factor energía y desarrollo",
color = "Clúster k-means",
title = "Agrupación de países según factores institucionales y de energía/desarrollo"
)
cluster_resumen <- cluster_data %>%
group_by(cluster_k3) %>%
summarise(
n_paises = n(),
prom_F_institucional = mean(F_institucional),
prom_F_energia_desarrollo = mean(F_energia_desarrollo),
.groups = "drop"
)
cluster_resumen
## # A tibble: 3 × 4
## cluster_k3 n_paises prom_F_institucional prom_F_energia_desarrollo
## <fct> <int> <dbl> <dbl>
## 1 1 50 0.646 -0.719
## 2 2 41 0.412 1.21
## 3 3 45 -1.09 -0.299